First I need to load up the packages I’ll need

library(sf)
## Linking to GEOS 3.4.2, GDAL 2.1.2, proj.4 4.9.1
library(ggplot2) #development version!
## devtools::install_github("tidyverse/ggplot2")
library(tidyverse)
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
library(readr)
## Not sure about this bit
library("tidyverse", lib.loc="/Library/Frameworks/R.framework/Versions/3.4/Resources/library")

Now I import my data. I filter for the Arran postcodes, (since Arran all begins ‘KA27’).

## Finding the Arran coordinates
library(dplyr)
allcoordinates <- read.csv("alldata/ukpostcodes.csv")
arrancoordinates <- filter(allcoordinates,substr(postcode,1,4)=="KA27")

Now I plot these coordinates.

## Plotting the Arran coordinates
ggplot(data = arrancoordinates) +
  geom_point(mapping = aes(x = longitude, y = latitude)) +
  ggtitle("Arran Postcodes") +
  labs(title = "Arran Postcodes", x = "Longitude", y = "Latitude") +
  theme(plot.title = element_text(hjust = 0.5))

Now I create some plots.

pcs <- read_sf("alldata/Scotland_pcs_2011")

#Print Post codes lists
arransubsect <- filter(pcs,substr(label,1,4)=="KA27")
arransubsect %>%
  ggplot() +
  geom_sf() +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

arransubsect %>%
ggplot() +
  geom_sf(aes(fill = name)) +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

Now I have to load up a new function.

#Multiplot code
#Multiplot code
#http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols:   Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)
  
  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
  
  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  
  if (numPlots==1) {
    print(plots[[1]])
    
  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

Then I can load the shape files.

#Import SIMD data from http://www.gov.scot/Topics/Statistics/SIMD
#The "new data zone boundaries with SIMD16 ranks (zipped shapefile)"
#'2011 Data Zone boundaries'

DZBoundaries2016 <- read_sf("./alldata/SG_SIMD_2016")

#https://data.gov.uk/dataset/scottish-index-of-multiple-deprivation-simd-2012
#https://data.gov.uk/dataset/scottish-index-of-multiple-deprivation-simd-2012/resource/d6fa8924-83da-4e80-a560-4ef0477f230b
DZBoundaries2012 <- read_sf("./alldata/SG_SIMD_2012")
DZBoundaries2009 <- read_sf("./alldata/SG_SIMD_2009")
DZBoundaries2006 <- read_sf("./alldata/SG_SIMD_2006")
DZBoundaries2004 <- read_sf("./alldata/SG_SIMD_2004")

Then (having already downloaded it), I can load the SIMD data.

#Look at data from 2016
SIMD2016 <-read.csv("./alldata/00505244.csv")
SIMD20162 <-read_sf("./alldata/SG_SIMD_2016")

#Look at data from 2012
SIMD2012 <- readxl::read_excel("./alldata/SIMD2012/00410770.xls")
SIMD20122 <- readxl::read_excel("./alldata/SIMD2012/00416552.xls")

#Look at data from 2009
SIMD2009 <- readxl::read_excel("./alldata/SIMD2009/0096578.xls")
SIMD20092 <- readxl::read_excel("./alldata/SIMD2009/0097806.xls")

#Look at data from 2006
# 2009 data - SIMD2006 <- readxl::read_excel("./alldata/SIMD2006/0096578.xls")
SIMD20062 <- readxl::read_excel("./alldata/SIMD2006/0097880.xls")

#Look at data from 2004
SIMD2004 <- readxl::read_excel("./alldata/SIMD2004/0027003.xls")

I have to choose the right columns manually in order to select the Arran data.

#Selecting ArranDZ2016
Arrandz <- c(4672,4666,4669,4671,4667,4668,4670)

#Health domain rank
#2016
arran2016 <- SIMD20162[Arrandz,]

#Find postcode look-up, KA27 postcodes. Find unique DZ. Find row positions.

#Selecting ArranDZ2012
Arrandz2012 <- c(4409,4372,4353,4352,4351,4350,4349)

#2012
arran2012 <- DZBoundaries2012[Arrandz2012,]
#2009
arran2009 <- DZBoundaries2009[Arrandz2012,]
#2006
arran2006 <- DZBoundaries2006[Arrandz2012,]
#2004
arran2004 <- DZBoundaries2004[Arrandz2012,]

The reason I’ve downloaded all the datazones shapefiles individually (three steps up), is because they change between 2016 and 2012. See below.

arran20162 <- arran2016 %>%
  select(DataZone, geometry, Percentile)  %>%
  mutate(year="2016")

arran20122 <- arran2012 %>%
  select(DataZone, geometry, Percentile) %>%
  mutate(year="2012")
arran1612 <- rbind(arran20162,arran20122)
arran1612 %>%
  ggplot() +
  geom_sf(aes(fill = DataZone)) +
  facet_wrap('year') +
  theme(legend.position="none") +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

Now I want to plot all the data, first I combine it all into one table.

arran20092 <- arran2009 %>%
  select(DataZone, geometry, Percentile) %>%
  mutate(year="2009")

arran20062 <- arran2006 %>%
  select(DataZone, geometry, Percentile) %>%
  mutate(year="2006")

arran20042 <- arran2004 %>%
  select(DataZone, geometry, Percentile) %>%
  mutate(year="2004")
arransimd <- rbind(arran20162,arran20122,arran20092,arran20062,arran20042)

Then I plot the data zones to look at them. (It looks like the only change was between 2012 and 2016).

arransimd %>%
  ggplot() +
  geom_sf(aes(fill = DataZone)) +
  facet_wrap('year') +
  theme(legend.position="none") +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

Now I plot the percentiles.

arransimd %>%
  ggplot() +
  geom_sf(aes(fill = Percentile)) +
  facet_wrap('year') +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

There we are. Not the SIMD health percentiles of Arran zones throughout SIMD history. And I’ve learned a little bit about graphics in R.

If I wanted to I could show the zones individually..

First I find the unique zones. (There are 14. 7 Zones 2016, 7 Zones pre-2016)

datazones <- unique(arransimd$DataZone)

I’ll have to find out a simpler way to do this but..

Pre-2016

S01004409 <- filter(arransimd, DataZone=="S01004409")

S01004409 %>%
  ggplot() +
  geom_sf(aes(fill = Percentile)) +
  facet_wrap('year') +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

S01004372 <- filter(arransimd, DataZone=="S01004372")

S01004372 %>%
  ggplot() +
  geom_sf(aes(fill = Percentile)) +
  facet_wrap('year') +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

S01004353 <- filter(arransimd, DataZone=="S01004353")

S01004353 %>%
  ggplot() +
  geom_sf(aes(fill = Percentile)) +
  facet_wrap('year') +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

S01004352 <- filter(arransimd, DataZone=="S01004352")

S01004352 %>%
  ggplot() +
  geom_sf(aes(fill = Percentile)) +
  facet_wrap('year') +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

S01004351 <- filter(arransimd, DataZone=="S01004351")

S01004351 %>%
  ggplot() +
  geom_sf(aes(fill = Percentile)) +
  facet_wrap('year') +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

S01004350 <- filter(arransimd, DataZone=="S01004350")

S01004350 %>%
  ggplot() +
  geom_sf(aes(fill = Percentile)) +
  facet_wrap('year') +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

S01004349 <- filter(arransimd, DataZone=="S01004349")

S01004349 %>%
  ggplot() +
  geom_sf(aes(fill = Percentile)) +
  facet_wrap('year') +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

2016

S01011177 <- filter(arransimd, DataZone=="S01011177")

S01011171 <- filter(arransimd, DataZone=="S01011171")

S01011174 <- filter(arransimd, DataZone=="S01011174")

S01011176 <- filter(arransimd, DataZone=="S01011176")

S01011172 <- filter(arransimd, DataZone=="S01011172")

S01011173 <- filter(arransimd, DataZone=="S01011173")

S01011175 <- filter(arransimd, DataZone=="S01011175")
S01011177 %>%
  ggplot() +
  geom_sf(aes(fill = Percentile)) +
  facet_wrap('year') +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

S01011171 %>%
  ggplot() +
  geom_sf(aes(fill = Percentile)) +
  facet_wrap('year') +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

S01011174 %>%
  ggplot() +
  geom_sf(aes(fill = Percentile)) +
  facet_wrap('year') +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

S01011176 %>%
  ggplot() +
  geom_sf(aes(fill = Percentile)) +
  facet_wrap('year') +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

S01011172 %>%
  ggplot() +
  geom_sf(aes(fill = Percentile)) +
  facet_wrap('year') +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

S01011173 %>%
  ggplot() +
  geom_sf(aes(fill = Percentile)) +
  facet_wrap('year') +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())

S01011175 %>%
  ggplot() +
  geom_sf(aes(fill = Percentile)) +
  facet_wrap('year') +
  theme(axis.text.x=element_blank(),
        axis.ticks.x=element_blank())